# 作者： wzq
# 2025年02月02日22时06分47秒
import faker as faker
# from faker import Faker
# import csv
# import random
#
# # 初始化Faker生成器（生成英文数据）
# fake = Faker()
#
# # 创建空数据集
# data = []
# for i in range(1, 101):
#     name = fake.name()
#     addr = fake.city()  # 生成城市+国家缩写（如"New York, NY"）
#     age = random.randint(18, 80)
#     sex = random.choice(["M", "F"])
#     data.append([i, name, addr, age, sex])
#
# # 写入CSV文件
# with open("tableau_practice_data.csv", "w", newline="", encoding="utf-8") as file:
#     writer = csv.writer(file)
#     writer.writerow(["id", "name", "addr", "age", "sex"])  # 列名
#     writer.writerows(data)
#
# print("CSV文件已生成！")
#
from faker import Faker
import csv
import random

# 初始化Faker生成器（英文数据）
fake = Faker()

# 第二个表（sales_data）的ID池（111的倍数，共20个）
sales_ids = [i * 111 for i in range(1, 21)]  # [111, 222, ..., 2220]

# 生成混合ID池（100个ID，含20个sales_data的ID + 80个原始ID）
original_ids = list(range(1, 101))          # 原始ID 1-100
random.shuffle(original_ids)                # 打乱原始ID

# 替换20个原始ID为sales_data的ID
mixed_ids = sales_ids[:20] + original_ids[:80]  # 前20用sales_id，后80用原始ID
random.shuffle(mixed_ids)                       # 打乱混合后的ID

# 生成数据
data = []
for idx, id_value in enumerate(mixed_ids):
    name = fake.name()
    addr = fake.city() + ", " + fake.state_abbr()
    age = random.randint(18, 80)
    sex = random.choice(["M", "F"])
    data.append([id_value, name, addr, age, sex])

# 写入CSV文件
with open("tableau_practice_data_v2.csv", "w", newline="", encoding="utf-8") as file:
    writer = csv.writer(file)
    writer.writerow(["id", "name", "addr", "age", "sex"])
    writer.writerows(data)

print("新版CSV文件已生成！")